#----------------------------------------------------------------------------------------------------------------------------------*

# Replication_DempseyIonescu_Figures.R contains codes for the figures in the paper 1, 2, 3, 4, 5, A.1, A.2, A.3, A.4, and A.5

#----------------------------------------------------------------------------------------------------------------------------------*

library(tidyverse)
library(haven)
library(Hmisc)
library(xlsx)
library(odbc)
library(RcppRoll)

setwd("/data/***")

full_data <- read_dta("average_apr_calculations_closed.dta")

##########################################################################################################################
# 
# Variable generation ----------------------------------------------------------------------------------------------------
#
##########################################################################################################################

#The following variables are created or defined in either Replication_DempseyIonescu_ProbitRegression.do, Replication_DempseyIonescu_OLSRegression.do, or Replication_DempseyIonescu_Summarystats.R:

# -uniqueid
# -mean_borrower_income
# -fico_mean_2018
# -apr_mean_2018
# -mean_spread
# -one_def, two_defs, three_defs
# -bank_fe_phat
# -all_fe_three_phat
# -p_star
# -mult_flag
# -revolver
# -mean_revolved_balance
# -revolver_count
# -q1_revolve, q2_revolve, q3_revolve, q4_revolve
# -mean_balance
# -new_account

# New variables used in this script are defined below:

# -group: Even 0.5% segments of bank_fe_phat used to break up the data and calculate probabilities

# -income_quar: Quartiles of mean_borrower_income used for analysis

range_probs_fine <- c(seq(0, .195,0.005),1)

default_probs <- full_data %>%
  mutate(
    group = as.numeric(cut(bank_fe_phat, breaks = c(range_probs_fine))),
    income_quar = ntile(mean_borrower_income, 4))

#The total sum of the cycle end balance and revolved balance are calculated for charting purposes

total_end = sum(default_probs$mean_balance)
total_revolved = sum(default_probs$mean_revolved_balance)

##########################################################################################################################
#
# Figure generation ------------------------------------------------------------------------------------------------------
#
##########################################################################################################################

#In all cases, actual plotting is done in Matlab. For replication, see related Matlab scripts and pseudo-data. Alternate versions of each plot replicated in ggplot, 
#however, are included for each figure in order to indicate which particular variables are being shown.

#Figure 1 ----------------------------------------------------------------------------------------------------------------

#Figure 1a

#Mean predicted and actual default rates are calculated for each segment of predicted probability

fig_1a <- default_probs %>% group_by(group) %>%
  summarise(predicted_default = mean(bank_fe_phat),
            actual_default = mean(two_defs))

#The actual_default column is plotted on the y-axis, with the predicted_default column on the x-axis of figure 1a. A 45 degree line is also plotted for reference.

ggplot(fig_1a, aes(x = predicted_default, y = actual_default)) + geom_point() + geom_abline(slope = 1, intercept = 0)

#Figure 1b

#Mean predicted default, as well as the cumulative share of accounts, cycle-end balance, and revolved balance are calculated for each segment of predicted probability

fig_1b <- default_probs %>% group_by(group) %>%
  summarise(predicted_default = mean(bank_fe_phat),
            n_accounts = n()/nrow(default_probs),
            n_balance = sum(mean_balance, na.rm = T)/total_end,
            n_revolved = sum(mean_revolved_balance, na.rm = T)/total_revolved) %>%
  mutate(cum_accounts = cumsum(n_accounts),
         cum_balance = cumsum(n_balance),
         cum_revolved = cumsum(n_revolved))

#The column predicted_default is plotted on the x-axis, while cum_accounts, cum_balance, and cum_revolved are the three series plotted on the y-axis.

ggplot(fig_1b %>% select(predicted_default, cum_accounts, cum_balance, cum_revolved) %>% pivot_longer(-predicted_default), aes(x = predicted_default, y = value, color = name)) + geom_point()

#Figure 1c

#The mean share of accounts revolving for at least 1, 2, 3, and 4 quarters in the sample is calculated for each segment of predicted probability

fig_1c <- default_probs %>% group_by(group) %>%
  summarise(predicted_default = mean(bank_fe_phat),
            rev_q1 = mean(q1_revolve),
            rev_q2 = mean(q2_revolve),
            rev_q3 = mean(q3_revolve),
            rev_q4 = mean(q4_revolve))

#The column predicted_default is plotted on the x-axis, while rev_q1, 2, and so forth are the four series plotted on the y-axis

ggplot(fig_1c %>% select(-group) %>% pivot_longer(-predicted_default), aes(x = predicted_default, y = value, color = name)) + geom_point()

#Figure 2 ----------------------------------------------------------------------------------------------------------------

#Figure 2a

#Mean predicted and actual default rates are calculated for each segment of predicted probability, as well as the average spread weighted by the revolved balance

fig_2a <- default_probs %>% group_by(group) %>%
  summarise(pred_default = mean(bank_fe_phat),
            real_default = mean(two_defs),
            average_spread = weighted.mean(mean_spread, mean_revolved_balance))

#The first series shows average spread versus predicted default, with the second showing average spread versus actual default. The linear line of best fit is further added.

ggplot(fig_2a) + geom_point(aes(x = pred_default, y = average_spread, color = "predicted" )) + geom_line(aes(x = real_default, y = average_spread, color = "realized")) + geom_abline(slope = 0.12, intercept = .1475)

#Figure 2b takes the line from average spread versus predicted default and overlays predicted model values

#Figure 3 ----------------------------------------------------------------------------------------------------------------

#For all three figures, results from the model must be overlayed on the calculated values

#Figure 3a

#Mean predicted default rate and share revolving for at least two quarters are calculated for each segment of predicted probability, conditional on an account revolving for at least a quarter

fig_3a <- default_probs %>% filter(q1_revolve == 1) %>%
  group_by(group) %>%
  summarise(predicted_default = mean(bank_fe_phat),
            rev_q2 = mean(q2_revolve))

#The column predicted_default is plotted on the x-axis, while rev_q2 is plotted on the y-axis

ggplot(fig_3a %>% select(-group) %>% pivot_longer(-predicted_default), aes(x = predicted_default, y = value, color = name)) + geom_point()

#Figure 3b

#Mean predicted default rate and share revolving for at least three quarters are calculated for each segment of predicted probability, conditional on an account revolving for at least a quarter

fig_3b <- default_probs %>% filter(q1_revolve == 1) %>%
  group_by(group) %>%
  summarise(predicted_default = mean(bank_fe_phat),
            rev_q3 = mean(q3_revolve))

#The column predicted_default is plotted on the x-axis, while rev_q3 is plotted on the y-axis

ggplot(fig_3b %>% select(-group) %>% pivot_longer(-predicted_default), aes(x = predicted_default, y = value, color = name)) + geom_point()

#model data must also be added

#Figure 3c

#Mean predicted default rate and share revolving for at least four quarters are calculated for each segment of predicted probability, conditional on an account revolving for at least a quarter

fig_3c <- default_probs %>% filter(q1_revolve == 1) %>%
  group_by(group) %>%
  summarise(predicted_default = mean(bank_fe_phat),
            rev_q4 = mean(q4_revolve))

#The column predicted_default is plotted on the x-axis, while rev_q4 is plotted on the y-axis

ggplot(fig_3c %>% select(-group) %>% pivot_longer(-predicted_default), aes(x = predicted_default, y = value, color = name)) + geom_point()

#Figure 4 ----------------------------------------------------------------------------------------------------------------

#Mean predicted default rate and revolved-weighted spread are calculated for each segment of predicted probability

#All sub-figures use the same data points, with different model values which must be added after

fig_4 <- default_probs %>% group_by(group) %>%
  summarise(pred_default = mean(bank_fe_phat),
            average_spread = weighted.mean(mean_spread, mean_revolved_balance))

#The predicted_default is plotted on the x-axis, while average_spread is plotted on the y-axis for all charts in figure 4. The line of best fit is also plotted

ggplot(fig_4, aes(x = pred_default, y = average_spread)) + geom_point() + geom_smooth(method = "lm", se = FALSE)

#Figure 5 ----------------------------------------------------------------------------------------------------------------

#Mean predicted ex post default probability (referenced as p_star) and ex ante default probability, weighted by revolved balance, are calculated for each segment of default predicted probability

#All sub-figures use the same data points, with different model values which must be added after

fig_5 <- default_probs %>% group_by(group) %>%
  summarise(ex_ante = weighted.mean(bank_fe_phat, mean_revolved_balance),
            ex_post = weighted.mean(p_star, mean_revolved_balance))

#The ex-ante default is plotted on the x-axis, while ex-post default is plotted on the y-axis. The line of best fit is also plotted
ggplot(fig_5, aes(x = ex_ante, y = ex_post)) + geom_point() + geom_smooth(method = "lm", se = FALSE)


#Figure A.1 ----------------------------------------------------------------------------------------------------------------

#Mean predicted default and revolved-weighted spread are calculated for each segment of predicted probability, separated out by quartiles of income

fig_a1 <- default_probs %>% group_by(group, income_quar) %>%
  summarise(pred_default = mean(bank_fe_phat),
            average_spread = weighted.mean(mean_spread, mean_revolved_balance))

#Predicted_default is plotted on the x-axis, while average_spread is plotted on the y-axis for all series, grouped by income quartile

ggplot(fig_a1, aes(x = pred_default, y = average_spread, color = as.factor(income_quar))) + geom_point() 


#Figure A.2 ----------------------------------------------------------------------------------------------------------------

#Mean predicted default and balance-weighted spread are calculated for each segment of predicted probability, separated out by type of borrower

#First, values are calculated for revolvers, weighting values by revolved balance

fig_a21 <- default_probs %>% filter(revolver == 1) %>% group_by(group) %>%
  summarise(pred_default = mean(bank_fe_phat),
            average_spread = weighted.mean(mean_spread, mean_revolved_balance)) %>%
  mutate(key = "Revolvers, weighted") %>%
  select(-group)

#Next, values are calculated for revolvers and transactors, using the unweighted mean

fig_a22 <- default_probs %>% group_by(group, revolver) %>%
  summarise(pred_default = mean(bank_fe_phat),
            average_spread = mean(mean_spread)) %>%
  mutate(key = ifelse(revolver == 1, "Revolvers, unweighted", "Transactors, unweighted")) %>%
  ungroup() %>%
  select(-c(group, revolver))

#Finally, values are calculated for the entire sample

fig_a23 <- default_probs %>% group_by(group) %>%
  summarise(pred_default = mean(bank_fe_phat),
            average_spread = mean(mean_spread)) %>%
  mutate(key = "All, unweighted") %>%
  select(-group)

fig_a2 <- rbind(fig_a21, fig_a22, fig_a23)

#Predicted_default is plotted on the x-axis, while average_spread is plotted on the y-axis for all series, grouped by borrower type

ggplot(fig_a2, aes(x = pred_default, y = average_spread, color = key)) + geom_point() 


#Figure A.3 ----------------------------------------------------------------------------------------------------------------

#Figure A.3a

#Mean predicted and actual default using the broader, three_def version, are calculated for each subgroup of predicted two_def default

fig_a3a <- default_probs %>% group_by(group) %>%
  summarise(predicted_default = mean(all_fe_three_phat),
            actual_default = mean(two_defs))

#The predicted_default column is plotted on the y-axis, with the actual_default column plotted on the y-axis

ggplot(fig_a3a, aes(x = predicted_default, y = actual_default)) + geom_point()

#Figure A.3b

#Mean predicted default using three_def, as well as the cumulative share of accounts, cycle-end balance, and revolved balance are calculated for each segment of predicted two_def probability

fig_a3b <- default_probs %>% group_by(group) %>%
  summarise(predicted_default = mean(all_fe_three_phat),
            n_accounts = n()/nrow(default_probs),
            n_balance = sum(mean_balance, na.rm = T)/total_end,
            n_revolved = sum(mean_revolved_balance, na.rm = T)/total_revolved) %>%
  mutate(cum_accounts = cumsum(n_accounts),
         cum_balance = cumsum(n_balance),
         cum_revolved = cumsum(n_revolved))

#The column predicted_default is plotted on the x-axis, while cum_accounts, cum_balance, and cum_revolved are the three series plotted on the y-axis.

ggplot(fig_a3b %>% select(predicted_default, cum_accounts, cum_balance, cum_revolved) %>% pivot_longer(-predicted_default), aes(x = predicted_default, y = value, color = name)) + geom_point()

#Figure A.3c

#Mean predicted default using three_def and the mean share of accounts revolving for at least 1, 2, 3, and 4 quarters in the sample is calculated for each segment of predicted probability

fig_c <- default_probs %>% group_by(group) %>%
  summarise(predicted_default = mean(all_fe_three_phat),
            rev_q1 = mean(q1_revolve),
            rev_q2 = mean(q2_revolve),
            rev_q3 = mean(q3_revolve),
            rev_q4 = mean(q4_revolve),)

#The column predicted_default is the x-axis, while rev_q1, 2, and so on are the four series plotted on the y-axis

ggplot(fig_a3c %>% select(-group) %>% pivot_longer(-predicted_default), aes(x = predicted_default, y = value, color = name)) + geom_point()

#Figure A.4 ----------------------------------------------------------------------------------------------------------------

#Figure A.4a

#Mean predicted and actual default are calculated using three_defs, as well as mean spread, weighted by revolved balance

fig_a4a <- default_probs %>% group_by(group) %>%
  summarise(pred_default = mean(all_fe_three_phat),
            real_default = mean(three_defs),
            average_spread = weighted.mean(mean_spread, mean_revolved_balance))

#Predicted_default and actual_default are plotted on the the x-axis, while average_spread is plotted on the y-axis

ggplot(fig_a4a) + geom_point(aes(x = pred_default, y = average_spread, color = "predicted" )) + geom_line(aes(x = real_default, y = average_spread, color = "realized")) + geom_abline(slope = 0.1, intercept = .1455)

#Figure A.4b

#Mean predicted and actual default are calculated using three_defs, as well as mean spread and standard deviation, weighted by revolved balance

fig_a4b <- default_probs %>% group_by(group) %>%
  summarise(pred_default = mean(bank_fe_phat),
            average_spread = weighted.mean(mean_spread, mean_revolved_balance),
            average_spread_sd = sqrt(wtd.var(mean_spread, mean_revolved_balance))) %>%
  mutate(average_spread_u = average_spread + average_spread_sd,
         average_spread_l = average_spread - average_spread_sd)

#Predicted_default is plotted on the the x-axis, while average_spread, along with confidence intervals are plotted on the y-axis

ggplot(fig_a4b, aes(x = pred_default)) + geom_point(aes(y = average_spread)) + geom_line(aes(y = average_spread_u))  + geom_line(aes(y = average_spread_l))

